library(mixOmics)
library(scMerge) ## for combing sce's
Data
## load the CellBench data from GitHub
data = "https://github.com/LuyiTian/CellBench_data/blob/master/data/sincell_with_class.RData?raw=true"
load(url(data))
## create a combined sce
sce.list = c("CELseq2" = sce_sc_CELseq2_qc,
"Dropseq" = sce_sc_Dropseq_qc,
"10X" = sce_sc_10x_qc)
## merge the sce's into one, keeping common genes and the cell_line colData
sce = scMerge::sce_cbind(sce_list = sce.list, cut_off_batch = 0,
cut_off_overall = 0, method = "intersect",
batch_names = names(sce.list),
colData_names = "cell_line")
Default model
## load the wrapper function - change to your rown directory
source("../../wrapper/mixOmics_mint.R")
## check if there are duplicate cell names
sum(duplicated(colnames(sce)))
## [1] 225
## ensure there are no duplicate cell names
colnames(sce) = make.unique(colnames(sce))
## run the wrapper and get both sce and mint.splsda outputs
sce.mint = mixOmics_mint(sce, colData.batch = "batch", colData.class = "cell_line",
keepX = c(50,50), ncomp = 2, output = "both", print.log = FALSE)
## get the mint object
mint = sce.mint$mint
## plot the mint.splsda components for both batches and all cell types
plotIndiv(mint$splsda, title = NULL, subtitle = "MINT sPLSDA",
legend = TRUE, legend.title = "Cell Line", study = "global")

## plot the mint.splsda components for all cell types for individual batches
plotIndiv(mint$splsda, title = "Individual Studies", subtitle = "MINT sPLSDA",
legend = TRUE, legend.title = "Cell Line", study = "CELseq2")

## plot the mint.splsda components for all cell types, separated by batches
plotIndiv(mint$splsda, title = "All Studies",
legend = TRUE, legend.title = "Cell Line", study = "all.partial")

Tuned model
## takes ~ 90 sec
## run the wrapper and get both tuned sce and mint.splsda outputs - start with a coarse grid
sce.mint.tuned = mixOmics_mint(sce, colData.batch = "batch", colData.class = "cell_line",
ncomp = 2, tune.keepX = seq(5,65,20), output = "both")
## get the tuner outputs
tuned_mint = sce.mint.tuned$mint
## plot the error rate over the assessed number of variables
plot(tuned_mint$tune)

## output the optimum number of variables
tuned_mint$tune$choice.keepX
## comp 1 comp 2
## 45 25
refine the search grid
fine.keepX = c(seq(35,47,3), ## for comp 1
seq(15,27, 3)) ## for comp 2
## run the wrapper with and tune over the fine grid
sce.mint.fine.tuned = mixOmics_mint(sce, colData.batch = "batch", colData.class = "cell_line",
ncomp = 2, tune.keepX = fine.keepX, output = "both")
More visualisations using mint.splsda object
## clustered image map
cim(mint$splsda, comp = c(1,2), margins=c(10,5),
row.sideColors = color.mixo(as.numeric(mint$splsda$Y)), row.names = FALSE,
title = 'MINT sPLS-DA', save = "png", name.save = "heatmap")
knitr::include_graphics("heatmap.png")

## loadings for each signature genes in each study
plotLoadings(mint$splsda, contrib='max', method = 'mean', comp=2,
study='all.partial', legend=FALSE, title=NULL,
subtitle = unique(mint$splsda$study) )

## correlation circle plot
plotVar(mint$splsda, cex = 3)

## roc curves for each batch
auroc(mint$splsda, roc.comp = 1, roc.study='CELseq2')

## $Comp1
## AUC p-value
## H1975 vs Other(s) 0.6967 2.863e-08
## H2228 vs Other(s) 1.0000 0.000e+00
## HCC827 vs Other(s) 0.7745 1.105e-12
##
## $Comp2
## AUC p-value
## H1975 vs Other(s) 0.9936 0
## H2228 vs Other(s) 1.0000 0
## HCC827 vs Other(s) 0.9999 0
Visualisation using sce object
## visualise the global components from sce object
df.global = as.data.frame(reducedDim(sce.mint$sce,"mint_comps_global"))
ggplot(df.global, aes(x=df.global[,1], y=df.global[,2],
col = colData(sce.mint$sce)[["cell_line"]])) + geom_point() +
labs(x = "Component 1", y = "Component 2",title = "MINT components for all cells") + guides(col=guide_legend(title="Cell Line"))

## visualise each batch's components from sce object
## see all available reducedDims
reducedDims(sce.mint$sce)
## List of length 4
## names(4): mint_comps_global mint_comps_10X mint_comps_CELseq2 mint_comps_Dropseq
## we will choose the mint_comps_Dropseq
df.dropseq = as.data.frame(reducedDim(sce.mint$sce,"mint_comps_Dropseq"))
## their respective cell lines
ggplot(df.dropseq, aes(x=df.dropseq[,1], y=df.dropseq[,2],
col = colData(sce.mint$sce)[["cell_line"]])) + geom_point(na.rm = TRUE) +
labs(x = "Component 1", y = "Component 2", title = "MINT components for Drop-seq cells") + guides(col=guide_legend(title="Cell Line"))

Markers
## look at marker genes
rownames(sce.mint$sce)[rowData(sce.mint$sce)[["mint_marker"]]]
## [1] "ENSG00000229391" "ENSG00000187134" "ENSG00000174021"
## [4] "ENSG00000121858" "ENSG00000163131" "ENSG00000142937"
## [7] "ENSG00000154639" "ENSG00000163739" "ENSG00000204287"
## [10] "ENSG00000137673" "ENSG00000100097" "ENSG00000204257"
## [13] "ENSG00000065978" "ENSG00000104388" "ENSG00000147604"
## [16] "ENSG00000179344" "ENSG00000106366" "ENSG00000160352"
## [19] "ENSG00000198832" "ENSG00000171863" "ENSG00000198431"
## [22] "ENSG00000206503" "ENSG00000154582" "ENSG00000166681"
## [25] "ENSG00000169715" "ENSG00000166710" "ENSG00000166908"
## [28] "ENSG00000023839" "ENSG00000132432" "ENSG00000108561"
## [31] "ENSG00000134339" "ENSG00000128739" "ENSG00000231389"
## [34] "ENSG00000223865" "ENSG00000104368" "ENSG00000154978"
## [37] "ENSG00000105193" "ENSG00000135677" "ENSG00000135506"
## [40] "ENSG00000167468" "ENSG00000115919" "ENSG00000122406"
## [43] "ENSG00000135919" "ENSG00000019582" "ENSG00000203875"
## [46] "ENSG00000266891" "ENSG00000111669" "ENSG00000142864"
## [49] "ENSG00000198502" "ENSG00000143947" "ENSG00000204525"
## [52] "ENSG00000151632" "ENSG00000226221" "ENSG00000129991"
## [55] "ENSG00000135446" "ENSG00000140937" "ENSG00000135373"
## [58] "ENSG00000161970" "ENSG00000100867" "ENSG00000132434"
## [61] "ENSG00000181019" "ENSG00000196683" "ENSG00000108518"
## [64] "ENSG00000137970" "ENSG00000065621" "ENSG00000183735"
## [67] "ENSG00000134709" "ENSG00000154640" "ENSG00000214485"
## [70] "ENSG00000177156" "ENSG00000196139" "ENSG00000214548"
## [73] "ENSG00000100979" "ENSG00000100234" "ENSG00000106853"
## [76] "ENSG00000108602" "ENSG00000153317" "ENSG00000260549"
## [79] "ENSG00000234745" "ENSG00000154277" "ENSG00000153310"
## [82] "ENSG00000149557" "ENSG00000148346" "ENSG00000225912"
## [85] "ENSG00000168899" "ENSG00000173432" "ENSG00000167641"
## [88] "ENSG00000249395" "ENSG00000161011" "ENSG00000253706"
## [91] "ENSG00000013297" "ENSG00000090013" "ENSG00000146648"
## [94] "ENSG00000153292" "ENSG00000069482" "ENSG00000182326"
## [97] "ENSG00000117525" "ENSG00000196126" "ENSG00000243859"
## [100] "ENSG00000131203"